% scribe: Lian Yu % lastupdate: Oct. 12, 2005 % lecture: 11 % references: Durrett, section 2.2 % title: Background on Convergence in Distribution % keywords: convergence in distribution, weak convergence, weak $\star$ convergence, weak convergence theorem, central limit theorem % end \documentclass[12pt,letterpaper]{article} \include{macros} \begin{document} \lecture{11}{Background on Convergence in Distribution}{Lian Yu} {liany@ieor.berkeley.edu} This topic is covered in section 2.2 of \cite{durrett}. \section{Definition of convergence in distribution} % keywords: convergence in distribution, weak convergence, weak $\star$ convergence % end \begin{definition} Let $S$ be a metric space, $\mathcal{S}$ = Borel $\sigma-$field on $S$. Let $P_1,P_2,\cdots$ be a sequence of probability measures on $(S,\mathcal{S})$. Say $P_n$ \emph{converges in distribution} to $P$ or $P_n\stackrel{d}{\longrightarrow}P$ for some probability measure $P$ on $(S,\mathcal{S})$, if \begin{align*} \int fdP_n\rightarrow \int fdP \end{align*} for every bounded continuous function $f:S\rightarrow \R$. \end{definition} This type of convergence is also often called \emph{weak convergence} or \emph{weak$\star$ convergence}. Note that the limits are unique. This comes down to the fact that the collection of bounded continuous functions is a determining class. That is, \begin{align*} \int fdP=\int fdQ \text{ for all bounded continuous $f$} \end{align*} implies $P(A)=Q(A)$ for all $A\in\mathcal{S}$. \section{Weak convergence of real random variables} %keywords: weak convergence theorem %end \begin{theorem} Let $P_n$, $n=1,2,\ldots$, and $P$ be probability measures on $\R$. The following are equivalent: \begin{enumerate} \item $\int fdP_n\rightarrow\int fdP$ for all bounded continuous $f$; \\ \item Same for all infinitely differentiable functions with all bounded derivatives $C^\infty$;\\ \item $P_n(-\infty,x]\rightarrow P(-\infty,x]$ for all $x$ at which $x\rightarrow P(-\infty,x]$ is continuous;\\ \item Condition 1 for all $f$ such that $f$ is bounded and continuous except on a set of $P$ measure $0$; and\\ \item Condition 3 with $P_n(C)$ for all $C$ closed, or with $P_n(O)$ for all $O$ open. \end{enumerate} \end{theorem} \begin{proof} \emph{1 $\Rightarrow$ 3}: Define $f_{u,v}$ by \begin{align*} f_{u,v}(x)= \begin{cases} 1 &\mbox{ if $x\leq u$ }\\ 0 &\mbox{ if $x\geq v$}\\ \mbox{linear}& \mbox{ if $u\leq x\leq v$.} \end{cases} \end{align*} For $\epsilon>0$, \begin{align*} f_{x-\epsilon,x}\leq \1(-\infty,x]\leq f_{x,x+\epsilon}. \end{align*} Write $Pf$ for $\int fdP$. So if $P_n\rightarrow P$, then \begin{align*} P_nf_{x-\epsilon,x}\leq P_n(-\infty,x]\leq P_nf_{x,x+\epsilon}. \end{align*} Let $n\rightarrow\infty$, \begin{align*} Pf_{x-\epsilon,x}\leq \liminf_{n\rightarrow\infty}{P_n(-\infty,x]}\leq \limsup_{n\rightarrow\infty}{P_n(-\infty,x]}\leq Pf_{x,x+\epsilon} \end{align*} and \begin{align*} P(-\infty,x-\epsilon]\leq Pf_{x-\epsilon,x}\leq P(-\infty,x]\leq Pf_{x,x+\epsilon}\leq P(-\infty,x+\epsilon]. \end{align*} Now assume $y\rightarrow P(-\infty,y]$ is continuous at $y=x$. Let $\epsilon\rightarrow 0$, we see that by taking $\epsilon$ sufficiently small we can make $Pf_{x-\epsilon,x}$ and $Pf_{x,x+\epsilon}$ as close as we like to $P(-\infty,x]$. Then we can conclude \begin{align*} \liminf_{n\rightarrow\infty}{P_n(-\infty,x]}=\limsup_{n\rightarrow\infty}P_n(-\infty,x]=P(-\infty,x]. \end{align*} \emph{3 $\Rightarrow$ 1:} To show this we use another approximation. 3 gives \begin{equation} P_nf\rightarrow Pf \mbox{ for $f=\1(-\infty,x]$ where $x$ is a continuity point of the distribution of $P$.} \label{thm21point3} \end{equation} First observe that the set of continuity points of $P$ is dense in $\R$, as there are only countably many jumps of $x\rightarrow P(-\infty,x]$. Second, note that we can extend (\ref{thm21point3}) from indicators to finite linear combinations of such indicators, i.e.\ to step functions. Now, let $f$ be continuous and bounded in magnitude by $M$. Choose some target $\epsilon > 0$ and choose $B$ so that $B$ and $-B$ are both continuity points of the limit distribution $x\rightarrow P(-\infty,x]$ and $P(-B,B]^c<\epsilon$. Note that there exists $n(\epsilon)$ such that $P_n(-B,B]^c<2\epsilon$ for all $n\geq n(\epsilon)$. Next, choose a step function $s$ so that \begin{align*} |s(x)-f(x)|\leq\epsilon \end{align*} for all $x\in(-B,B]$ and $s=0$ outside $(-B,B]$ (this can be done by uniform continuity of $f$ on $[-B,B]$). Also \begin{equation} |P_nf-P_ns|\leq 2\epsilon M+\epsilon \label{thm21starstar} \end{equation} for $n\geq n(\epsilon)$. (Note that our $s$ depends on $\epsilon$.) Choose $n$ even larger so that $|P_ns-Ps|\leq\epsilon$. Thus, by the triangle inequality, \begin{align*} |P_nf-Ps|\leq 2\epsilon M+2\epsilon. \end{align*} We also have (\ref{thm21starstar}) for $n\rightarrow\infty$, so we can replace $P_n$ by $P$ and put it all together to get \begin{align*} |P_nf-Pf|\leq 4\epsilon M+4\epsilon \end{align*} for all sufficiently large $n$. \end{proof} \section{Central Limit Theorem} %keywords: central limit theorem %end \begin{theorem} Let $X_1,X_2,\cdots$ be i.i.d. with $\E(X_n)=\mu$, $\var(x_n)=\sigma^2<\infty$. If $S_n=X_1+\cdots+X_n$, then \begin{align*} \frac{S_n-n\mu}{\sigma\sqrt{n}}\stackrel{d}{\longrightarrow}N(0,1) \end{align*} \end{theorem} \begin{proof} The key observation is that if the $X_i$'s are normal, then $S_n$ is normal, and \begin{align*} \frac{S_n-n\mu}{\sigma \sqrt{n}}\stackrel{d}{\longrightarrow}N(0,1)\mbox{.} \end{align*} This comes back to the addition rule for independent normal variables: if $X\sim N(\mu,\sigma^2)$, $Y\sim N(\nu,\tau^2)$, and $X$ and $Y$ are independent, then $X+Y\sim N(\mu+\nu,\sigma^2+\tau^2)$. We introduce some notation: $X\sim N(\mu,\sigma^2)$ means \begin{align*} P(X\leq x)=\int_{-\infty}^x\frac{1}{\sqrt{2\pi}\sigma}e^{-\frac{1}{2}\frac{(y-\mu)^2}{\sigma^2}}dy\mbox{.} \end{align*} Define $\Phi(z)=\int_{-\infty}^z\frac{1}{\sqrt{2\pi}}e^{-\frac{1}{2}y^2}dy$, then \begin{eqnarray*} Z\sim N(0,1)&\Longleftrightarrow& P(Z\leq z)=\Phi(z),\\ X\sim N(\mu,\sigma^2)&\Longleftrightarrow&\frac{x-\mu}{\sigma}\sim N(0,1)\\ &\Longleftrightarrow& P(X\leq x)=\Phi\left(\frac{x-\mu}{\sigma}\right)\\ &\Longleftrightarrow& P(X\in dx)=\frac{1}{\sigma}\phi\left(\frac{x-\mu}{\sigma}\right)dx. \end{eqnarray*} There are several ways to prove the addition rule: \begin{enumerate} \item use a transform (m.g.f.\ or c.f.); \item hack it out by the convolution formula (complete the square inside the exponential): \begin{align*} f_{X+Y}(z)=\int_{-\infty}^\infty f_X(x)f_Y(z-x)dx; \end{align*} or \item by a geometric argument. Look at the case $X\sim N(0,1)$, $Y\sim N(0,1)$ independent \begin{align*} P(X\in dx, Y\in dy)&=f_X(x)dxf_Y(y)dy\\ &=\frac{1}{\sqrt{2\pi}}e^{-\frac{1}{2}x^2}\frac{1}{\sqrt{2\pi}}e^{-\frac{1}{2}y^2}dxdy\\ &=\frac{1}{2\pi}e^{-\frac{1}{2}(x^2+y^2)}dxdy \end{align*} For $\sigma^2+\tau^2=1$, $X\sim N(0,\sigma^2)$ and $Y\sim N(0,\tau^2)$, rotate $X$ and $Y$: then $\cos\theta X+\sin\theta Y\stackrel{d}{\longrightarrow}N(0,1)$. \end{enumerate} \end{proof} \bibliographystyle{plain} \bibliography{../books} \end{document}